# ANES_coding.R

# Part of the replication archive for 
#
#   Bullock, John G. 2020. "Education and Attitudes toward Redistribution in
#   the United States." British Journal of Political Science 50.


# This file loads and codes the cumulative ANES dataset. It also merges the 
# ANES dataset with data on compulsory attendance laws and with demographic 
# and political data that are measured at the state-year level. 

library(Bullock, lib.loc = c(.libPaths(), 'packageLibrary'))   # for %IN%, split_fac(), sumNA()
library(car)       # for Recode()
library(dplyr)     # for %>%
library(haven)     # for read_spss()
library(stringr)   # for str_pad()

source('ANES_stateYoungAugmentation.R')
source('CSL_coding.R')        
source('ANES_coding_YearsOfSchooling.R')
source('functions/mergeStateControlVars.R')
source('functions/NES_StateRecode.R')



##############################################################################
# LOAD ANES DATA
##############################################################################
# The code below downloads the cumulative ANES from its home at 
# https://electionstudies.org. The ANES does not provide permanent links 
# (for example, DOI-based links) to any of its files, and the link included in 
# the code below will stop working when the ANES changes the URL. When that 
# happens, you need only update the link in the download.file() command to 
# correct the problem. Of course, you can also download the ANES "by hand" and 
# replace the code below with a command which loads the dataset that you've 
# downloaded.  [2019 07 14]
ANES_cumulative <- tempfile(fileext = '.zip')
download.file(
  url      = 'https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_cdf_dta.zip', 
  destfile = ANES_cumulative)
ANES <- unz(ANES_cumulative, 'anes_timeseries_cdf_stata13.dta') %>% 
  read_dta() %>%
  rename_all(toupper) %>% 
  select(
    VCF0004,
    VCF0006A,
    VCF0101,
    VCF0104,
    VCF0106,
    VCF0112,
    VCF0116,
    VCF0132,
    VCF0133,    
    VCF0140,
    VCF0142,    
    VCF0806,
    VCF0809,
    VCF0901B,
    VCF9013,
    VCF9015) %>%
  zap_labels()



##############################################################################
# CODE REQUIRED VARIABLES
##############################################################################

# OUTCOMES 
# More liberal answers have lower values, except for the binary "voteDem" 
# variables. "DK" and "no opinion" are coded as NA.
ANES$guarantee.7pt   <- Recode(ANES$VCF0809, '9=NA; 0=NA')  
ANES$govt.health.7pt <- Recode(ANES$VCF0806, 'c(9,0)=NA')


# VARIABLES NEEDED FOR DATA MERGING, IV CODING, AND CONTROL
ANES$yearInt          <- ANES$VCF0004  # year of interview
ANES$ID.unique        <- as.integer(ANES$VCF0006A)                      # unique respondent number    
ANES$yearInt.fac9496  <- ordered(Recode(ANES$yearInt, '1994 = 1996'))   
ANES$yearInt.fac      <- droplevels(ANES$yearInt.fac9496) 
ANES$age              <- Recode(ANES$VCF0101, '0=NA') %>% as.integer()
ANES$yearYoung        <- ANES$yearInt - ANES$age + 14                   # year at which R was 14
ANES$educLevel        <- ordered(
  Recode(
    var       = ANES$VCF0140, 
    recodes   = '1="max8"; 2="max12"; 3:4="HSdiploma"; 5="some college"; 6="BA or higher"; else=NA', 
    as.factor = TRUE),
  levels = c('max8', 'max12', 'HSdiploma', 'some college', 'BA or higher'))
  table(ANES$VCF0140, ANES$educLevel)  # check the Recode                        
ANES$college          <- ANES$educLevel >= 'BA or higher'
ANES$collegeAttended  <- ANES$educLevel >= 'some college'
ANES$female           <- Recode(ANES$VCF0104, '0=NA')==2
ANES$race             <- Recode(
  var       = ANES$VCF0106, 
  recodes   = '1="white"; 2="black"; 3="otherRace"; else=NA', 
  as.factor = TRUE)
ANES$race      <- relevel(ANES$race, 'white')  
ANES$bornInUS    <- Recode(
  var     = ANES$VCF0142, 
  recodes = '101:199 = TRUE; c(998, 999, 000) = NA; else = FALSE') == 1  


# STATE "WHERE YOU GREW UP"  
ANES$stateYoung        <- ANES$VCF0132                                           # state where "you grew up" 
ANES$stateYoung[is.na(ANES$stateYoung)] <- ANES$VCF0133[is.na(ANES$stateYoung)]  # again, state where "you grew up"
ANES$stateYoung        <- NES_StateRecode(ANES$stateYoung) 
ANES$southYoung        <- ANES$stateYoung %IN% qw('AL AR DE DC FL GA KY LA MD MS NC OK SC TN TX VA WV')


# STATE OF RESIDENCE AT TIME OF INTERVIEW
ANES$state.contemp  <- Recode(ANES$VCF0901B, 'c("HI", 99)=NA')  # state of current residence
ANES$region.contemp <- Recode(
  var       = ANES$VCF0112, 
  recodes   = '1="Northeast"; 2="North Central"; 3="South"; 4="West"', 
  as.factor = TRUE) 


# STATE OF BIRTH
# Data on state of birth are in the cumulative ANES only through 1994.  They 
# are not in the 1996-2002 or 2008 time-series studies.  They -are- in the 
# 2004 time-series study, and I add those data below when I source 
# "ANES_stateYearAugmentation.R."  They are also in the 2012 time-series  
# study, but those data are only available on a restricted basis.
ANES$stateOfBirth <- NES_StateRecode(ANES$VCF0142)  



##############################################################################
# MECHANISMS: EMPLOYMENT
##############################################################################
ANES$employed7cat.NA3        <- Recode(ANES$VCF0116, 'c(5,8,7) = NA') == 1  # retired, student, housewife = NA



##############################################################################
# MECHANISMS: ECONOMIC INDIVIDUALISM AND EQUALITY OF OPPORTUNITY
##############################################################################
# ensureEqOpp: Our society should do whatever is necessary to make sure that 
#   everyone has an equal opportunity to succeed.  
# unequalProblem: One of the big problems in this country is that we don't 
#   give everyone an equal chance.   
ANES$ensureEqOpp    <- Recode(ANES$VCF9013, '1=5; 2=4; 4=2; 5=1; c(8,9)=NA')  # higher values = greater agreement
ANES$unequalProblem <- Recode(ANES$VCF9015, 'c(8,9)=NA')  # higher values = more disagreement, i.e., belief that we already "give everyone an equal chance" 


# DESERT / ECONOMIC INDIVIDUALISM (1972 ANES ONLY)
# --The 1972 ANES included ten questions about "economic individualism."  
#   Feldman (AJPS 1982, 456-58) discusses these questions.  I've listed them 
#   here in the order that Feldman lists them on page 457 of his article.
# --Higher values indicate greater belief in economic individualism, i.e., 
#   greater belief that one's wealth or poverty is one's own doing.
# --Feldman suggests that the first six questions measure belief in the 
#   existence of equal opportunity; the last four, belief in the importance of
#   work ethic.  I examine these two separate batteries, too.  
# --The first six questions are very unusual for measuring not whether equal
#   opportunity is important to respondents, but whether they think that it 
#   actually exists.  
ANES_1972 <- tempfile(fileext = '.zip')  
download.file(
  url      = 'https://electionstudies.org/wp-content/uploads/2018/06/anes1972dta.zip', 
  destfile = ANES_1972)
ANES1972 <- unz(ANES_1972, 'NES1972.dta') %>% 
  read_dta() %>%
  zap_labels()

recodeString      <- '1=1; 2=2; 4=3; 5=4; c(0, 8, 9)=NA' 
recodeStringRev   <- '1=4; 2=3; 4=2; 5=1; c(0, 8, 9)=NA'
ANES1972$noEqualChance       <- Recode(ANES1972$V720689, recodeString)    %>% as.integer()
ANES1972$seniorityHurtsPoor  <- Recode(ANES1972$V720692, recodeString)    %>% as.integer()
ANES1972$unionsHurtPoor      <- Recode(ANES1972$V720694, recodeString)    %>% as.integer()
ANES1972$schoolsHurtPoor     <- Recode(ANES1972$V720691, recodeString)    %>% as.integer()
ANES1972$notEnoughJobs       <- Recode(ANES1972$V720687, recodeString)    %>% as.integer()
ANES1972$wealthyKeepPoorDown <- Recode(ANES1972$V720686, recodeString)    %>% as.integer()
ANES1972$anyoneCanGetAJob    <- Recode(ANES1972$V720688, recodeStringRev) %>% as.integer()
ANES1972$poorLackAbility     <- Recode(ANES1972$V720690, recodeStringRev) %>% as.integer()
ANES1972$poorDontWantToWork  <- Recode(ANES1972$V720693, recodeStringRev) %>% as.integer()
ANES1972$poorLackAmbition    <- Recode(ANES1972$V720695, recodeStringRev) %>% as.integer()

# Examine reliability of the batteries.
# --The "poor lack ability" seems like it doesn't belong, in the sense that it 
#   doesn't speak as clearly as the others to beliefs in economic individualism.  
#   It -may- suggest that economic success is due to the individual rather than 
#   society.  But what it implies about feelings toward redistribution is 
#   quite unclear. Note too that, in Feldman's analysis (AJPS 1982, 458), it 
#   loads more weakly than any other variable onto either dimension.  And in 
#   the code below, it is the only item that reduces the reliability of the 
#   scale.  With all of this in mind, I remove it from the scale.
eqOppExists1972Battery <- with(ANES1972, cbind(noEqualChance, seniorityHurtsPoor, unionsHurtPoor, schoolsHurtPoor, notEnoughJobs, wealthyKeepPoorDown))
workEthic1972Battery   <- with(ANES1972, cbind(anyoneCanGetAJob, poorLackAbility, poorDontWantToWork, poorLackAmbition))    
econIndiv1972Battery   <- cbind(eqOppExists1972Battery, workEthic1972Battery)
reliability(na.omit(eqOppExists1972Battery))      # .74
reliability(na.omit(workEthic1972Battery))        # .47
reliability(na.omit(workEthic1972Battery[, -2]))  # .50
reliability(na.omit(econIndiv1972Battery))        # .67
reliability(na.omit(econIndiv1972Battery[, -8]))  # .72
eqOppExists <- Bullock::rescale(apply(eqOppExists1972Battery,     1, sum))
workEthic   <- Bullock::rescale(apply(workEthic1972Battery[, -2], 1, sum))


# Create the scale and add it to the cumulative ANES environment.
ID1972 <- str_pad(ANES1972$V720002, width=4, side='left', pad='0') %>%
  paste0('1972', .) %>%
  as.integer()
ANES <- left_join(
  ANES,
  data.frame(ID1972, eqOppExists, workEthic),
  by = c("ID.unique" = "ID1972"))



##############################################################################
# MERGE IN RESPONDENTS FROM THE 1970 ANES SUPPLEMENT
##############################################################################
# Respondents in the 1970 ANES supplement are not included in the cumulative 
# ANES.
source('ANES_1970_supplement.R')
levels(ANES1970$yearInt.fac)   <- c(
    levels(ANES1970$yearInt.fac), 
    levels(ANES$yearInt.fac)) %>%
  unique()
ANES1970$stateYoung <- droplevels(ANES1970$stateYoung)
ANES <- bind_rows(
  ANES, 
  ANES1970[!ANES1970$ID.unique %in% ANES$ID.unique, colnames(ANES1970) %in% colnames(ANES)] )

  

##############################################################################
# AUGMENT THE STATE-OF-RESIDENCE-WHEN-YOUNG VARIABLE
##############################################################################
# In the cumulative ANES, state "where you grew up" is missing for several  
# years. The code in this section fills in the missing data with responses 
# from related questions. That is, it augments the "stateYoung" variable.
lNA(ANES$stateYoung)
ANES <- ANES_stateYoungAugmentation(ANES_obj = ANES)



##############################################################################
# SEGREGATION-RELATED MEASURES
##############################################################################
# Lochner and Moretti (2004) use measures like these.  To get the maximum 
# number of cases, they need to be created after the state-of-residence-when-
# young augmentation above.
ANES$blackPostBrown <- 
  ANES$race       == 'black' & 
  ANES$yearYoung  >= 1958    & 
  ANES$stateYoung %IN% c(
    'AL', 'AR', 'DE', 'DC', 'FL', 'GA', 'KY', 'LA', 'MD', 'MS', 'NC', 'OK', 
    'SC', 'TN', 'TX', 'VA', 'WV')
ANES$MSDuringRepeal <- 
  ANES$race       ==   'white'   &
  ANES$yearYoung  %IN% 1957:1982 & 
  ANES$stateYoung ==   'MS' 
ANES$SCDuringRepeal <- 
  ANES$race       ==  'white'   &
  ANES$yearYoung %IN% 1956:1971 & 
  ANES$stateYoung ==  'SC'
ANES$duringRepeal   <- 
  (ANES$MSDuringRepeal | ANES$SCDuringRepeal) & 
  ANES$race == 'white' 



##############################################################################
# MERGE CUMULATIVE ANES WITH EDUCATION VARIABLES FROM YEAR-SPECIFIC 
# ANES STUDIES
##############################################################################
# The cumulative ANES only has coarse measures of years of education, e.g., 
# "more than 8 years of education but no high school diploma."  I would like 
# to have a measure of education that varies by year.  To do this, I import 
# the finer education variables that are available in many of the 
# year-specific ANES studies, e.g., the 1982 ANES.  
# 
# The ANES_codeYearsOfSchooling() function is created by
# ANES_coding_yearsOfSchooling.R, which is sourced at the start of this file.
# It returns a data frame with two columns: "ID.unique" and 
# "educYearsUncensored."
ANES <- left_join(ANES, ANES_codeYearsOfSchooling(), by = "ID.unique") %>%
  mutate(
    educ      = educYearsUncensored,
    yearsTo13 = pmin(educYearsUncensored, 13),
    HSgrad    = educLevel >= 'HSdiploma')



##############################################################################
# MERGE STATE-LEVEL CHARACTERISTICS INTO ANES CUMULATIVE FILE 
##############################################################################
ANES <- bind_cols(ANES, mergeStateControlVars(ANES$stateYoung, ANES$yearYoung))



##############################################################################
# MERGE CSL INSTRUMENTS INTO ANES CUMULATIVE FILE
##############################################################################
# Match the CSL data to each ANES subject. We must include "work_age" and 
# "drop_age" as they are needed for the comparison of different instrument 
# sets that is reported in the appendix.  [2019 07 08]
ANES <- left_join(
    x  = ANES, 
    y  = CSLdata[, qw("state year CA CL work_age drop_age")],
    by = c("stateYoung" = "state", "yearYoung" = "year")) %>%
  mutate(stateYoung = factor(stateYoung))



##############################################################################
# SAVE CSL-MERGED ANALYSIS
##############################################################################
saveRDS(ANES, file = 'data/ANES_withMergedCSLs.RDS')
